********************************************************************************
************  This program generates the main results in the paper ************* 
********************************************************************************

clear
set more off

global root = "...\Replication_Package"

********** Figure 1 **********

* ETF
use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0
drop t 

* Yearly frequency
egen max_date = max(date), by(year)
keep if max_date == date

* Unit: Bp
replace exp_ratio = exp_ratio * 10000

* Bring differentiation
merge m:1 permno_etf using "${root}\Data\etf_data", keep(match) nogen
drop q
rename q4 q

* Percentiles
gen differentiation = 1 - sim_all

* Percentiles
collapse (mean) mean_fee = exp_ratio_m mean_dif = differentiation [aw = mktcap], by(date q)
sort q date

* Mean
twoway (line mean_fee date if q == 1, graphregion(color(white)) xtitle("") ytitle("Fee") leg(label(1 "Broad-index ETFs") cols(1) ring(0) position(11) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line mean_fee date if q == 2, leg(label(2 "Smart-beta ETFs"))) /// 
	   (line mean_fee date if q == 3, mc(black) leg(label(3 "Industry/sector ETFs"))) ///
	   (line mean_fee date if q == 4, leg(label(4 "Thematic ETFs"))) ///

*

********** Figure 2 **********

quietly do "${root}\Functions\fig2.do"


********** Figure 3 **********

* AUM 
use "${root}\Data\etf_month_data", clear

keep if month == 12
egen aum_ = sum(mktcap), by(q year)
keep year date q aum_
duplicates drop
sort q year

* Chart
replace aum_ = aum_/1000000000000
twoway (line aum year if q == 0) (line aum year if q == 1)


* Implied revenue 
use "${root}\Data\etf_month_data", clear

* Average AUM and fee
egen mean_aum = mean(mktcap), by(permno_etf year)
egen mean_fee = mean(exp_ratio), by(permno_etf year)

* Implied revenue
gen rev = mean_fee*mean_aum

keep if month == 12

egen rev_ = sum(rev), by(q year)
keep year date q rev_
duplicates drop
sort q year

replace rev_ = rev_/1000000000
twoway (line rev_ year if q == 0) (line rev_ year if q == 1)


* Number of launches 
use "${root}\Data\etf_month_data", clear
keep if month == 12
keep if year >= 1993 & year <= 2019
keep date year
duplicates drop
expand 2
gen n = _n
gen q = .
replace q = 0 if n <= 27
replace q = 1 if n > 27
drop n
tempfile year
save `year'

use "${root}\Data\etf_data", clear
gen year = year(launch_date)
keep if year <= 2019
drop if year == .
egen num_etf_ = count(permno_etf), by(q year)
keep num_etf_ q year
duplicates drop

merge m:1 year q using `year', nogen
sort q year
replace num_etf_ = 0 if num_etf_ == .

* Latex
reshape wide num_etf_, i(date) j(q)

* Chart
graph bar num_etf_0 num_etf_1, over(year, lab(labsize(*1) angle(45)))  /// 
	ytitle("Number of New ETFs") ///	
	leg(label(1 "Broad-based ETFs") label(2 "Specialized ETFs"))  /// 
	

* Number of closures 
use "${root}\Data\etf_month_data", clear
keep if month == 12
keep if year >= 1993 & year <= 2019
keep date year
duplicates drop
expand 2
gen n = _n
gen q = .
replace q = 0 if n <= 27
replace q = 1 if n > 27
drop n
tempfile year
save `year'

use "${root}\Data\etf_data", clear
gen year = year(delisting_date)
keep if year <= 2019
drop if year == .
egen num_etf_ = count(permno_etf), by(q year)
keep num_etf_ q year
duplicates drop

merge m:1 year q using `year', nogen
sort q year
replace num_etf_ = 0 if num_etf_ == .

* Latex
reshape wide num_etf_, i(date) j(q)

* Chart
graph bar num_etf_0 num_etf_1, over(year, lab(labsize(*1) angle(45)))  /// 
	ytitle("Number of New ETFs") ///	
	leg(label(1 "Broad-Based ETFs") label(2 "Specialized ETFs"))  /// 
	
*


********** Table 1 **********

use "${root}\Data\etf_data", clear

replace exp_ratio = exp_ratio*10000
replace turnover = turnover*100
replace aum_2019 = aum_2019/1000

* Broad-based
tabstat n_holdings exp_ratio turnover mkt_exret delisted aum_2019 rev_2019 if q == 0, stat(n mean sd p5 p25 p50 p75 p95) c(s)

* Specialized
tabstat n_holdings exp_ratio turnover mkt_exret delisted aum_2019 rev_2019 if q == 1, stat(n mean sd p5 p25 p50 p75 p95) c(s)



********** Figure 4 **********

use "${root}\Data\etf_data", clear

* Product differentiation
gen dif = .
replace dif = (1 - sim_bb)*100 if q == 0 & sim_bb != 0
replace dif = (1 - sim_sp)*100 if q == 1 & sim_sp != 0
replace dif = 0 if launch_date < mdy(1,1,1994)

* Unit
replace exp_ratio = exp_ratio * 10000

* 2002
sepscatter exp_ratio dif [aw=aum_2002], separate(q) msymbol(circle_hollow) xtitle("Product Differentiation") ytitle("Fee")

* 2019
sepscatter exp_ratio dif [aw=aum_2019], separate(q) msymbol(circle_hollow) xtitle("Product Differentiation") ytitle("Fee")


********** Table 2 **********

use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & year >= 2000


* Flows
bysort permno_etf (date): gen flows = (mktcap[_n+1] - mktcap*(1+ret[_n+1]))/mktcap
replace flows = log(1 + flows)
replace flows = flows*100

* Ranking by dates
astile ret_rank = ret, by(date) nq(100)
drop ret

* Controls
gen age = log(t)
gen size = log(mktcap/1000000)
gen tvol = log(1 + vol/shrout)
replace exp_ratio = exp_ratio*10000

* Interaction terms
gen int_1 = exp_ratio*q
gen int_2 = ret_rank*q


* 2000-2019
reghdfe flows exp_ratio int_1 ret_rank int_2 q size age tvol, a(date) cluster(permno_etf date)

* 2000-2009
reghdfe flows exp_ratio int_1 ret_rank int_2 q size age tvol if date <= mdy(12,31,2009), a(date) cluster(permno_etf date)

* 2010-2019
reghdfe flows exp_ratio int_1 ret_rank int_2 q size age tvol if date > mdy(12,31,2009), a(date) cluster(permno_etf date)


* Keep US ETFs
merge m:1 permno_etf using "${root}\Data\etf_data", keep(match) nogen
keep if us_holdings >= 0.8
drop exp_ratio

* Bring media exposure/sentiment
astile media_rank = etf_css_vw, by(date) nq(5)
gen high_media = .
replace high_media = 0 if media_rank <= 4
replace high_media = 1 if media_rank > 4 & media_rank < .
drop if high_media == .

* Interaction terms
drop int_1 int_2
gen int_1 = exp_ratio * high_media
gen int_2 = ret_rank * high_media

* 2000-2019
reghdfe flows exp_ratio int_1 ret_rank int_2 high_media size age tvol, a(date) cluster(permno_etf date)

* 2000-2009
reghdfe flows exp_ratio int_1 ret_rank int_2 high_media size age tvol if date <= mdy(12,31,2009), a(date) cluster(permno_etf date)

* 2010-2019
reghdfe flows exp_ratio int_1 ret_rank int_2 high_media size age tvol if date > mdy(12,31,2009), a(date) cluster(permno_etf date)



********** Table 3 **********

* Panel A
use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & year >= 2000

* Delisting returns
replace ret = dlret if ret == . & dlret != .


* VW
forvalues i = 0/1{
	bysort date: asgen temp_ret_`i' = (ret/(q == `i')), w(mktcap_lag)
}
gen ret_w = .
forvalues i = 0/1{
	replace ret_w = temp_ret_`i' if q == `i'
}

* Excess returns
gen exret_w = ret_w - rf

collapse (mean) year month ret_w exret_w mktrf smb hml rmw cma umd me ia roe, by(q date)


* % scale
foreach var of varlist exret_w-roe{
	replace `var' = `var' * 100
}


forvalues i = 0/1{
	reg exret_w if q == `i' 
	reg exret_w mktrf if q == `i' 
	reg exret_w mktrf smb hml if q == `i' 
	reg exret_w mktrf smb hml umd if q == `i' 
	reg exret_w mktrf smb hml rmw cma if q == `i' 
	reg exret_w mktrf smb hml rmw cma umd if q == `i' 
	reg exret_w mktrf me ia roe if q == `i' 
}

* High minus low
keep if q == 0 | q == 1
sort date q
bysort date: gen exret = ret_w - ret_w[_n-1]
keep if q == 1
replace exret = exret*100

reg exret
reg exret mktrf 
reg exret mktrf smb hml 
reg exret mktrf smb hml umd 
reg exret mktrf smb hml rmw cma 
reg exret mktrf smb hml rmw cma umd 
reg exret mktrf me ia roe 


* Panel B
use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & t <= 60 & year >= 2000

* Delisting returns
replace ret = dlret if ret == . & dlret != .


* VW
forvalues i = 0/1{
	bysort date: asgen temp_ret_`i' = (ret/(q == `i')), w(mktcap_lag)
}
gen ret_w = .
forvalues i = 0/1{
	replace ret_w = temp_ret_`i' if q == `i'
}

* Excess returns
gen exret_w = ret_w - rf

collapse (mean) year month ret_w exret_w mktrf smb hml rmw cma umd me ia roe, by(q date)


* % scale
foreach var of varlist exret_w-roe{
	replace `var' = `var' * 100
}


forvalues i = 0/1{
	reg exret_w if q == `i' 
	reg exret_w mktrf if q == `i' 
	reg exret_w mktrf smb hml if q == `i' 
	reg exret_w mktrf smb hml umd if q == `i' 
	reg exret_w mktrf smb hml rmw cma if q == `i' 
	reg exret_w mktrf smb hml rmw cma umd if q == `i' 
	reg exret_w mktrf me ia roe if q == `i' 
}

* High minus low
keep if q == 0 | q == 1
sort date q
bysort date: gen exret = ret_w - ret_w[_n-1]
keep if q == 1
replace exret = exret*100

reg exret
reg exret mktrf 
reg exret mktrf smb hml 
reg exret mktrf smb hml umd 
reg exret mktrf smb hml rmw cma 
reg exret mktrf smb hml rmw cma umd 
reg exret mktrf me ia roe 


* Panel C
use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t > 60 & year >= 2004 // year >= 2004 because of age > 5 years requirement 

* Delisting returns
replace ret = dlret if ret == . & dlret != .


* VW
forvalues i = 0/1{
	bysort date: asgen temp_ret_`i' = (ret/(q == `i')), w(mktcap_lag)
}
gen ret_w = .
forvalues i = 0/1{
	replace ret_w = temp_ret_`i' if q == `i'
}

* Excess returns
gen exret_w = ret_w - rf

collapse (mean) year month ret_w exret_w mktrf smb hml rmw cma umd me ia roe, by(q date)

* % scale
foreach var of varlist exret_w-roe{
	replace `var' = `var' * 100
}


forvalues i = 0/1{
	reg exret_w if q == `i' 
	reg exret_w mktrf if q == `i' 
	reg exret_w mktrf smb hml if q == `i' 
	reg exret_w mktrf smb hml umd if q == `i' 
	reg exret_w mktrf smb hml rmw cma if q == `i' 
	reg exret_w mktrf smb hml rmw cma umd if q == `i' 
	reg exret_w mktrf me ia roe if q == `i' 
}

* High minus low
keep if q == 0 | q == 1
sort date q
bysort date: gen exret = ret_w - ret_w[_n-1]
keep if q == 1
replace exret = exret*100

reg exret
reg exret mktrf 
reg exret mktrf smb hml 
reg exret mktrf smb hml umd 
reg exret mktrf smb hml rmw cma 
reg exret mktrf smb hml rmw cma umd 
reg exret mktrf me ia roe 


********** Figure 5 **********

quietly do "${root}\Functions\fig5.do"

* Broad-index pre
insheet using "${root}\Data\fig_indexed_performance_1.csv", names clear

twoway (line car1 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper1 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower1 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

* Smart-beta pre
insheet using "${root}\Data\fig_indexed_performance_1.csv", names clear

twoway (line car2 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper2 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower2 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

* Industry/sector pre
insheet using "${root}\Data\fig_indexed_performance_1.csv", names clear

twoway (line car3 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper3 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower3 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

* Thematic pre
insheet using "${root}\Data\fig_indexed_performance_1.csv", names clear

twoway (line car4 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper4 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower4 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

*


* Broad-index post
insheet using "${root}\Data\fig_indexed_performance_2.csv", names clear

twoway (line car1 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper1 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower1 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

* Smart-beta post
insheet using "${root}\Data\fig_indexed_performance_2.csv", names clear

twoway (line car2 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper2 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower2 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

* Industry/sector post
insheet using "${root}\Data\fig_indexed_performance_2.csv", names clear

twoway (line car3 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper3 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower3 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

* Thematic post
insheet using "${root}\Data\fig_indexed_performance_2.csv", names clear

twoway (line car4 t, lc(b) lp(solid) lw(medthick) leg(label(1 "All ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
       (line upper4 t, lc(b) lp(dot) lw(medthick) leg(label(2 ""))) ///
       (line lower4 t, lc(b) lp(dot) lw(medthick) leg(label(3 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("CAR FFC4 (%)") /// 
	   xlab(-36(12)60)) ///
   	

********** Table 4 **********

use "${root}\Data\hedging_portfolios", clear

forvalues i = 1/5{
	reg exret_w mktrf if q == `i'
	reg exret_w mktrf smb hml if q == `i'
	reg exret_w mktrf smb hml umd if q == `i'
	reg exret_w mktrf smb hml rmw cma if q == `i'
	reg exret_w mktrf smb hml rmw cma umd if q == `i'
	reg exret_w mktrf me ia roe if q == `i'
}


********** Table 5 **********

use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & year >= 2000

* Flows
replace ret = 0 if ret == .
bysort permno_etf (date): gen flows = (mktcap - mktcap[_n-1]*(1+ret))/mktcap[_n-1]
replace flows = round(flows,0.0001)
replace flow = 1 if flow >= 0
replace flow = 0 if flow < 0

* Controls
gen age = log(t)
gen interaction = q * age
gen a = 1

* Regression
reghdfe flows q age interaction, a(a) cluster(permno_etf date)
reghdfe flows q age interaction, a(date) cluster(permno_etf date)

reghdfe flows q age interaction if t <= 60, a(a) cluster(permno_etf date)
reghdfe flows q age interaction if t <= 60, a(date) cluster(permno_etf date)


********** Figure 5 **********

* (a)

use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & year >= 2000

* Flows
bysort permno_etf (date): gen flows = (mktcap[_n+1] - mktcap*(1+ret[_n+1]))/mktcap

* Winsorize
winsor2 flows, by(year) cut(1 99) replace

* Demean flows
egen mean_flows = mean(flows), by(year)
replace flows = flows - mean_flows

* Demean returns
replace ret = ret - rf - mktrf

* New ETFs
keep if t <= 60

* Broad-based
preserve
lpoly flows ret if q == 0 & ret >= -0.15 & ret <= 0.15, gen(x s) se(se) bwidth(0.04) noscatter ci lineopts(lcolor(black) lwidth(medium)) ciopts(recast(rline) lcolor(black) lwidth(medium) lpattern(dash)) ytitle("") xtitle(Rm - Rf) title(" ") legend(off)  xline(0, lcolor(black))  xlabel(-.15(.05).15, format(%3.2f)) legend(off) graphregion(fcolor(white))  ylabel(, angle(horizontal) format(%4.3f) nogrid) note("")  xtitle(, margin(medsmall))  graphregion(lcolor(white))

* Specialized
restore
lpoly flows ret if q == 1 & ret >= -0.15 & ret <= 0.15, gen(x s) se(se) bwidth(0.04) noscatter ci lineopts(lcolor(black) lwidth(medium)) ciopts(recast(rline) lcolor(black) lwidth(medium) lpattern(dash)) ytitle("") xtitle(Rm - Rf) title(" ") legend(off)  xline(0, lcolor(black))  xlabel(-.15(.05).15, format(%3.2f)) legend(off) graphregion(fcolor(white))  ylabel(, angle(horizontal) format(%4.3f) nogrid) note("")  xtitle(, margin(medsmall))  graphregion(lcolor(white))


* (b)

clear
gen permno_etf = .
tempfile temp
save `temp', replace

use "Data\etf_data", clear
local n = _N

* Create panel of ETFs
forvalues i = 1/`n'{

	use "Data\etf_data" in `i', clear
	local permno_etf = permno_etf[1]
	local q = q[1]
	
	* Month of desliting date
	gen dif = ((delisting_date - launch_date)/365)*12
	local dif = dif[1]
	
	clear
	set obs 60
	gen permno_etf = `permno_etf'
	gen q = `q'
	gen t = _n
	gen delisted = 0
	replace delisted = 1 if t > `dif'
	
	append using `temp'
	save `temp', replace

}

* Ratio of delisted ETFs, per category
gen num_etf = 1
gen num_delisted = delisted
collapse (sum) num_delisted num_etf, by(q t)
gen rdlst = num_delisted/num_etf

* Chart
twoway (line rdlst t if q == 0) (line rdlst t if q == 1)



* (c)

use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & year >= 2000

* Make data quarterly
bysort permno_etf (date): gen qret = exp( log(1+ret) + log(1+ret[_n-1]) + log(1+ret[_n-2])) - 1
gen cal_quarter = qofd(date)
bysort permno cal_quarter (date): keep if _n==_N
drop if qret == .

* Delisted ETFs
gen delist = 0
bysort permno_etf (date): replace delist = 1 if _n == _N & delisted == 1 

collapse (mean) ret (max) delisted q, by(permno_etf)

* Broad-based
preserve
lpoly delist ret if q == 0, gen(x s) se(se) bwidth(0.04) noscatter ci lineopts(lcolor(black) lwidth(medium)) ciopts(recast(rline) lcolor(black) lwidth(medium) lpattern(dash)) ytitle("P(Liquidation)") xtitle(Average Quarterly Return) title(" ") legend(off)  xline(0, lcolor(black))  xlabel(-.10(.05).05, format(%3.2f)) ylabel(0.10(.10).80, format(%3.2f))legend(off) graphregion(fcolor(white))  ylabel(, angle(horizontal) format(%4.3f) nogrid) note("")  xtitle(, margin(medsmall))  graphregion(lcolor(white))

* Specialized
restore
lpoly delist ret if q == 1, gen(x s) se(se) bwidth(0.04) noscatter ci lineopts(lcolor(black) lwidth(medium)) ciopts(recast(rline) lcolor(black) lwidth(medium) lpattern(dash)) ytitle("P(Liquidation)") xtitle(Average Quarterly Return) title(" ") legend(off)  xline(0, lcolor(black))  xlabel(-.10(.05).05, format(%3.2f)) ylabel(0.10(.10).80, format(%3.2f))legend(off) graphregion(fcolor(white))  ylabel(, angle(horizontal) format(%4.3f) nogrid) note("")  xtitle(, margin(medsmall))  graphregion(lcolor(white))



********** Table 6 **********

use "${root}\Data\etf_month_data", clear
keep if t > -24 & t <= -6

* Bring ETF-level variables
merge m:1 permno_etf using "${root}\Data\etf_data", keep(match) nogen

* Require us holdings >= 80%
keep if us_holdings >= 0.8

* % scale
replace etf_exret_vw = etf_exret_vw*100

* Collapse by ETF level
collapse (mean) launch_date q etf_exret_vw-etf_unprof_vw, by(permno_etf)

* Broad-based and specialized ETF indicator
gen bb = 0
replace bb = 1 if q == 0
gen sp = 0
replace sp = 1 if q == 1

* Cluster for standard errors
gen year = year(launch_date)
gen month = month(launch_date)
gen yearmo = year*100 + month 

* Mean and t-stat
reg etf_exret_vw i.bb, cluster(yearmo) nocons
reg etf_exret_vw i.sp, cluster(yearmo) nocons

reg etf_skew i.bb, cluster(yearmo) nocons
reg etf_skew i.sp, cluster(yearmo) nocons

reg etf_size_vw i.bb, cluster(yearmo) nocons
reg etf_size_vw i.sp, cluster(yearmo) nocons

reg etf_mb_vw i.bb, cluster(yearmo) nocons
reg etf_mb_vw i.sp, cluster(yearmo) nocons

reg etf_psale_vw i.bb, cluster(yearmo) nocons
reg etf_psale_vw i.sp, cluster(yearmo) nocons

reg etf_evebitda_vw i.bb, cluster(yearmo) nocons
reg etf_evebitda_vw i.sp, cluster(yearmo) nocons

reg etf_sir_vw i.bb, cluster(yearmo) nocons
reg etf_sir_vw i.sp, cluster(yearmo) nocons

reg etf_n_vw i.bb, cluster(yearmo) nocons
reg etf_n_vw i.sp, cluster(yearmo) nocons

reg etf_css_vw i.bb, cluster(yearmo) nocons
reg etf_css_vw i.sp, cluster(yearmo) nocons

reg etf_sue_vw i.bb, cluster(yearmo) nocons
reg etf_sue_vw i.sp, cluster(yearmo) nocons

reg etf_unprof_vw i.bb, cluster(yearmo) nocons
reg etf_unprof_vw i.sp, cluster(yearmo) nocons

* t-test
ttest etf_exret_vw, by(bb) uneq
ttest etf_skew, by(bb) uneq
ttest etf_size_vw, by(bb) uneq
ttest etf_mb_vw, by(bb) uneq
ttest etf_psale_vw, by(bb) uneq
ttest etf_evebitda_vw, by(bb) uneq
ttest etf_sir_vw, by(bb) uneq
ttest etf_n_vw, by(bb) uneq
ttest etf_css_vw, by(bb) uneq
ttest etf_sue_vw, by(bb) uneq
ttest etf_unprof_vw, by(bb) uneq




********** Figure 7 **********

clear
gen t = .

tempfile sentiment
save `sentiment'

* Iterate along calendar number around launch
forvalues i = -24/24{

	* Month-by-month
	use "${root}\temp\etf_month_data" if t == `i', clear

	* EW
	egen ew_mb = mean(etf_mb_vw), by(q date)
	egen ew_news = mean(etf_css_vw), by(q date)

	* Collapse
	collapse ew_mb ew_news, by(q date)
	
	tempfile temp
	save `temp'
	
	* Iterate along bb and thematic
	forvalues j = 0/1{
		
		use `temp', clear
		
		* EW
		reg ew_mb if q == `j'
		local avg_mb = _b[_cons]
		local se_mb = _se[_cons]
		
		reg ew_news if q == `j'
		local avg_news = _b[_cons]
		local se_news = _se[_cons]
		
		clear
		set obs 1
		gen t = `i'
		gen q = `j'
		gen avg_mb = `avg_mb'
		gen se_mb = `se_mb'
		gen avg_news = `avg_news'
		gen se_news = `se_news'
		
		append using `sentiment'
		save `sentiment', replace

	}
}


use `sentiment', clear

gen upper_mb = avg_mb + 1.96*se_mb
gen lower_mb = avg_mb - 1.96*se_mb

gen upper_news = avg_news + 1.96*se_news
gen lower_news = avg_news - 1.96*se_news

* Market-to-book
twoway (line avg_mb t if q == 0, lc(b) lp(shortdash) lw(medthick) leg(label(1 "Broad-Based ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line avg_mb t if q == 1, lc(b) lp(solid) lw(medthick) leg(label(2 "Specialized ETFs"))) ///
       (line upper_mb t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(3 ""))) ///
       (line lower_mb t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(4 ""))) ///
       (line upper_mb t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(5 ""))) ///
       (line lower_mb t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(6 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("Market-to-book") /// 
	   xlab(-24(12)24)) ///

	  	   
* News
twoway (line avg_news t if q == 0, lc(b) lp(shortdash) lw(medthick) leg(label(1 "Broad-Based ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line avg_news t if q == 1, lc(b) lp(solid) lw(medthick) leg(label(2 "Specialized ETFs"))) ///
       (line upper_news t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(3 ""))) ///
       (line lower_news t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(4 ""))) ///
       (line upper_news t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(5 ""))) ///
       (line lower_news t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(6 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("Market-to-book") /// 
	   xlab(-24(12)24)) ///



********** Figure 8 **********

* Return 
use "${root}\Data\etf_month_data", clear
keep if t > -24 & t <= -6

* Collapse by ETF level
collapse (mean) etf_exret_vw, by(permno_etf)
rename etf_ sentiment

tempfile sentiment
save `sentiment'

use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t >= 0 & year >= 2000

* Delisting returns
replace ret = dlret if ret == . & dlret != .

* Bring sentiment
merge m:1 permno_etf using `sentiment', keep(match) nogen

* Sentiment groups
astile q2 = sentiment, by(q date) nq(2)
replace q2 = q2 - 1

* Keep new ETFs
keep if t <= 60

* EW
egen ret_e = mean(ret), by(q q2 date)

* VW
forvalues i = 0/1{
	forvalues j = 0/1{
		bysort date: asgen temp_ret_`i'_`j' = (ret/(q == `i' & q2 == `j')), w(mktcap_lag)
	}
}
gen ret_w = .
forvalues i = 0/1{
	forvalues j = 0/1{
		replace ret_w = temp_ret_`i'_`j' if q == `i' & q2 == `j'
	}
}

* Excess returns
gen exret_e = ret_e - rf
gen exret_w = ret_w - rf

collapse (mean) year month ret_e ret_w exret_e exret_w rf mktrf smb hml rmw cma umd me ia roe, by(date q q2)

* % scale
foreach var of varlist ret_e-roe{
	replace `var' = `var' * 100
}

* Broad-based & low sentiment
reg exret_w mktrf smb hml umd if q == 0 & q2 == 0

* Broad-based & high sentiment
reg exret_w mktrf smb hml umd if q == 0 & q2 == 1

* Specialized & low sentiment
reg exret_w mktrf smb hml umd if q == 1 & q2 == 0

* Specialized & high sentiment
reg exret_w mktrf smb hml umd if q == 1 & q2 == 1





* Media sentiment 
use "${root}\Data\etf_month_data", clear

* Post-launch data
keep if t > -24 & t <= -6

* Collapse by ETF level
collapse (mean) etf_css_vw, by(permno_etf)
rename etf_ sentiment

tempfile sentiment
save `sentiment'

use "${root}\Data\etf_month_data", clear
keep if t >= 0 & year >= 2000

* Delisting returns
replace ret = dlret if ret == . & dlret != .

* Bring sentiment
merge m:1 permno_etf using `sentiment', keep(match) nogen

* Sentiment groups
astile q2 = sentiment, by(q date) nq(2)
replace q2 = q2 - 1

* Keep new ETFs
keep if t <= 60

* EW
egen ret_e = mean(ret), by(q q2 date)

* VW
forvalues i = 0/1{
	forvalues j = 0/1{
		bysort date: asgen temp_ret_`i'_`j' = (ret/(q == `i' & q2 == `j')), w(mktcap_lag)
	}
}
gen ret_w = .
forvalues i = 0/1{
	forvalues j = 0/1{
		replace ret_w = temp_ret_`i'_`j' if q == `i' & q2 == `j'
	}
}

* Excess returns
gen exret_e = ret_e - rf
gen exret_w = ret_w - rf

collapse (mean) year month ret_e ret_w exret_e exret_w rf mktrf smb hml rmw cma umd me ia roe, by(date q q2)

* % scale
foreach var of varlist ret_e-roe{
	replace `var' = `var' * 100
}

* Broad-based & low sentiment
reg exret_w mktrf smb hml umd if q == 0 & q2 == 0

* Broad-based & high sentiment
reg exret_w mktrf smb hml umd if q == 0 & q2 == 1

* Specialized & low sentiment
reg exret_w mktrf smb hml umd if q == 1 & q2 == 0

* Specialized & high sentiment
reg exret_w mktrf smb hml umd if q == 1 & q2 == 1





********** Figure 9 **********


* LTG
clear
gen t = .

tempfile ltg
save `ltg'

* Iterate along calendar number around launch
forvalues i = -8/8{

	* Month-by-month
	use "${root}\Data\etf_month_data" if t > (`i'-1)*3 & t <= `i'*3, clear

	* EW
	egen ew = mean(etf_ltg_vw), by(q date)

	* Collapse
	collapse ew, by(q date)

	tempfile temp
	save `temp'
	
	* Iterate along bb and thematic
	forvalues j = 0/1{
		
		sleep 100
		use `temp', clear
		
		* EW
		reg ew if q == `j'
		local avg = _b[_cons]
		local se = _se[_cons]

		clear
		set obs 1
		gen t = `i'
		gen q = `j'
		gen avg = `avg'
		gen se = `se'

		append using `ltg'
		save `ltg', replace

	}
}


use `ltg', clear

gen upper = avg + 1.96*se
gen lower = avg - 1.96*se

twoway (line avg t if q == 0, lc(b) lp(shortdash) lw(medthick) leg(label(1 "Broad-Based ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line avg t if q == 1, lc(b) lp(solid) lw(medthick) leg(label(2 "Specialized ETFs"))) ///
       (line u t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(3 ""))) ///
       (line l t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(4 ""))) ///
       (line u t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(5 ""))) ///
       (line l t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(6 "")) ///	 	   
	   xtitle("Trading Quarters Relative to Launch Date") ytitle("LTG") /// 
	   xlab(-8(4)8)) ///
   	
*


* Forecast errors
clear
gen t = .

tempfile fe
save `fe'


* Iterate along calendar number around launch
forvalues i = 0/8{

	* Month-by-month
	use "${root}\Data\etf_month_data" if t > (`i'-1)*3 & t <= `i'*3, clear

	* EW
	egen ew = mean(etf_fe), by(q date)
	replace ew = ew*100

	* Collapse
	collapse ew, by(q date)

	tempfile temp
	save `temp'
	
	* Iterate along bb and thematic
	forvalues j = 0/1{
		
		use `temp', clear
		
		* EW
		reg ew if q == `j', robust
		local avg = _b[_cons]
		local se = _se[_cons]

		clear
		set obs 1
		gen t = `i'
		gen q = `j'
		gen avg = `avg'
		gen se = `se'

		append using `fe'
		save `fe', replace

	}
}


use `fe', clear

gen upper = avg + 1.96*se
gen lower = avg - 1.96*se

twoway (line avg t if q == 0, lc(b) lp(shortdash) lw(medthick) leg(label(1 "Broad-Based ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line avg t if q == 1, lc(b) lp(solid) lw(medthick) leg(label(2 "Specialized ETFs"))) ///
       (line u t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(3 ""))) ///
       (line l t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(4 ""))) ///
       (line u t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(5 ""))) ///
       (line l t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(6 "")) ///	   
	   xtitle("Trading Quarters Relative to Launch Date") ytitle("Forecast Error") /// 
	   xlab(0(4)8)) ///
   	



********** Figure 10 **********

use "${root}\Data\etf_month_data", clear

* First year
keep if t >= 0 & t <= 12

* Per $m $ users
replace users_holding = users_holding/(mktcap/1000000)

* Collapse at ETF level
collapse (mean) ior users_holding q, by(permno_etf)

tabstat ior if q == 0, stat(n mean sd p5 p25 p50 p75 p95) c(s)
tabstat ior if q == 1, stat(n mean sd p5 p25 p50 p75 p95) c(s) 
tabstat users_holding if q == 0, stat(n mean sd p5 p25 p50 p75 p95) c(s)
tabstat users_holding if q == 1, stat(n mean sd p5 p25 p50 p75 p95) c(s)




********** Figure 11 **********

* Underlying
clear
gen t = .

tempfile underlying
save `underlying'

* Iterate along calendar number around launch
forvalues i = -12/12{

	* Month-by-month
	use "${root}\Data\etf_month_data" if t == `i', clear

	* EW
	egen ew = mean(etf_users_vw), by(q date)

	* Collapse
	collapse ew, by(q date)

	tempfile temp
	save `temp'
	
	* Iterate along bb and thematic
	forvalues j = 0/1{
		
		use `temp', clear
		
		* EW
		reg ew if q == `j'
		local avg = _b[_cons]
		local se = _se[_cons]

		clear
		set obs 1
		gen t = `i'
		gen q = `j'
		gen avg = `avg'
		gen se = `se'

		append using `underlying'
		save `underlying', replace

	}
}


use `underlying', clear

gen upper = avg + 1.96*se
gen lower = avg - 1.96*se

twoway (line avg t if q == 0, lc(b) lp(shortdash) lw(medthick) leg(label(1 "Broad-Based ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line avg t if q == 1, lc(b) lp(solid) lw(medthick) leg(label(2 "Specialized ETFs"))) ///
       (line u t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(3 ""))) ///
       (line l t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(4 ""))) ///
       (line u t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(5 ""))) ///
       (line l t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(6 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("Robinhood Users (#)") /// 
	   xlab(-12(12)12)) ///
   	

* ETF
clear
gen t = .

tempfile etf
save `etf'

* Iterate along calendar number around launch
forvalues i = 1/12{

	* Month-by-month
	use "${root}\temp\etf_month_data" if t == `i', clear

	* EW
	egen ew = mean(users_holding), by(q date)

	* Collapse
	collapse ew, by(q date)

	tempfile temp
	save `temp'
	
	* Iterate along bb and thematic
	forvalues j = 0/1{
		
		use `temp', clear
		
		* EW
		reg ew if q == `j'
		local avg = _b[_cons]
		local se = _se[_cons]

		clear
		set obs 1
		gen t = `i'
		gen q = `j'
		gen avg = `avg'
		gen se = `se'

		append using `etf'
		save `etf', replace

	}
}


use `etf', clear

set obs 25
set obs 26
replace q = 0 in 25
replace q = 1 in 26
replace t = 0 in 25
replace t = 0 in 26
replace avg = 0 in 25
replace avg = 0 in 26
replace se = 0 in 25
replace se = 0 in 26

gen upper = avg + 1.96*se
gen lower = avg - 1.96*se

twoway (line avg t if q == 0, lc(b) lp(shortdash) lw(medthick) leg(label(1 "Broad-Based ETFs") cols(1) ring(0) position(8) bmargin(0.5) region(lwidth(none) fc(none)))) ///
	   (line avg t if q == 1, lc(b) lp(solid) lw(medthick) leg(label(2 "Specialized ETFs"))) ///
       (line u t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(3 ""))) ///
       (line l t if q == 0, lc(b) lp(dot) lw(medthick) leg(label(4 ""))) ///
       (line u t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(5 ""))) ///
       (line l t if q == 1, lc(b) lp(dot) lw(medthick) leg(label(6 "")) ///
	   xtitle("Trading Months Relative to Launch Date") ytitle("Robinhood users (#)") /// 
	   xlab(0(12)12)) ///
   	
* 




